##################################################
# Replication Code
# Taeyong Park and Andrew Reeves
# "Local Unemployment and Voting for President: Uncovering Causal Mechanisms"
# Summary: Recoding and Data Analysis for 2016
##################################################


rm(list = ls())
library(foreign)
library(Amelia)
library(stringr)
library(R2WinBUGS);library(rjags);library(R2jags);library(coda);library(superdiag); library(MCMCpack)
library(abind)

data = read.dta("data16Imputed1.dta") 

# Evaluations
data$VBad = rep(0, nrow(data))
data$VBad[data$natecon5==5] = 1
data$Bad = rep(0, nrow(data))
data$Bad[data$natecon5==4] = 1
data$Good = rep(0, nrow(data))
data$Good[data$natecon5==2] = 1
data$VGood = rep(0, nrow(data))
data$VGood[data$natecon5==1] = 1

# Age
data$younger30 = rep(0, nrow(data))
data$younger30[data$age<30] = 1
data$older65 = rep(0, nrow(data))
data$older65[data$age>=65] = 1

# Race
data$black = rep(0, nrow(data))
data$black[data$raceNew==1]=1
data$latino = rep(0, nrow(data))
data$latino[data$raceNew==2]=1

# Employment status
data$fullTime = rep(0, nrow(data))
data$fullTime[data$employment == 1] = 1 
data$partTime = rep(0, nrow(data))
data$partTime[data$employment == 2] = 1 
data$unemployed = rep(0, nrow(data))
data$unemployed[data$employment == 3] = 1 

# Education
data$noHighSchool = rep(0, nrow(data))
data$noHighSchool[data$educNew==4] = 1
data$HighSchool = rep(0, nrow(data))
data$HighSchool[data$educNew==3] = 1
data$someCollege = rep(0, nrow(data))
data$someCollege[data$educNew==1] = 1
data$fourCollege = rep(0, nrow(data))
data$fourCollege[data$educNew==2] = 1

# Party ID
data$Rep = rep(0, nrow(data))
data$Rep[data$party3==3] = 1
data$Ind = rep(0, nrow(data))
data$Ind[data$party3==2] = 1
data$Dem = rep(0, nrow(data))
data$Dem[data$party3==1] = 1

# News interest
data$Very=ifelse(data$newsInt==4, 1, 0)
data$Some=ifelse(data$newsInt==3, 1 ,0)
data$Few=ifelse(data$newsInt==1 | data$newsInt==2, 1, 0)

# Unemp
data$avgUnemp15 = apply(data[,5:16], 
                        FUN=mean, MARGIN=1)
data$avgUnemp16 = apply(data[,17:28], 
                        FUN=mean, MARGIN=1)
data$chgUnemp1615=data$avgUnemp16-data$avgUnemp15

# Income
data$chgInc1615=(data$inc16-data$inc15)/1000
data$inc16 = data$inc16/1000



########################################
#
# MEDIATION ANALYSIS STEP 1 and STEP2
# A part of the code below is from Becher and Donnelly (2013)
# 
########################################


######## step1Model1 #######
attach(data)
y = data$natecon5
N = nrow(data)
Nstate = length(unique(data$stateID)) 

Ncat = 5


data.jags = list("y"=y, "stateID"=stateID, "chgUnemp1615"= chgUnemp1615, "avgUnemp16" = avgUnemp16,"chgInc1615"=chgInc1615, "inc16"=inc16,
                 "younger30"=younger30, "older65"=older65, "female"=female, "black"=black, "latino"=latino, "noHighSchool"=noHighSchool, 
                 "HighSchool"=HighSchool, "someCollege"=someCollege, "fourCollege"=fourCollege, "income"=income, "unemployed"=unemployed, 
                 "fullTime"=fullTime, "partTime"=partTime, "ownHome"=ownHome, "newsInt"=newsInt, "ideol"=ideol,
                 "Dem"=Dem, "Rep" = Rep, "Ind"=Ind,
                 "N" = N, "Nstate" = Nstate, "Ncat" = Ncat)

params = c("b.state", "b.1", "b.2", "b.3", "b.4", "b.5", "b.6","b.7","b.8","b.9",
           "b.10", "b.11", "b.12", "b.13", "b.14", "b.15", "b.16","b.17","b.18","b.19",
           "b.20", "b.21", "b.22", "b.23", "cut", "sigma.state")
output = jags(data.jags, inits=NULL, params, model.file="Model1_2016.bug", n.chains=1, n.burnin=20000, n.iter=70000, jags.seed = 0508)


natecon.mcmc = as.mcmc(output) ## 5 thining by default. 

write.table(data.frame(natecon.mcmc[[1]]), "mcmc16_step1Model1")
rm(output); rm(natecon.mcmc)


######## step1Model2 #######
model16 = MCMChlogit(fixed=pvote2 ~ VGood + Good + Bad + VBad +
                       chgUnemp1615 +  avgUnemp16 + chgInc1615 + inc16 +
                       younger30 + older65 + female + black + latino + noHighSchool + HighSchool + someCollege + fourCollege +  
                       income + unemployed + fullTime + partTime + ownHome +
                       Very + Few + ideol + Dem + Rep + Ind,
                     random=~1, group="stateID",
                     data=data, burnin=20000, mcmc=50000, thin=4,verbose=1,
                     seed=0508, beta.start=0, sigma2.start=1,
                     Vb.start=1, mubeta=0, Vbeta=1.0E6,
                     r=1, R=diag(1), nu=0.001, delta=0.001, FixOD=1)

maincoefs = model16$mcmc[, 1:29]

colnames(maincoefs)=c("intercept", "VGood", "Good", "Bad", "VBad", 
                      "chgUnemp1615", "avgUnemp16", "chgInc1615", "inc16",
                      "younger30", "older65", "female", "black", "latino", "noHighSchool", "HighSchool", "someCollege", "fourCollege", "income", 
                      "unemployed", "fullTime", "partTime", "ownHome",
                      "Very", "Few", "ideol",  "Dem", "Rep", "Ind")

write.table(maincoefs, "mcmc16_step1Model2_MainCoeffs.txt")
groupcoefs = model16$mcmc[, 30:80]
write.table(groupcoefs, "mcmc16_step1Model2_GroupCoeffs.txt")
rm(maincoefs); rm(groupcoefs)


######## step2Model1 #######

withmat_mcmc = read.table("mcmc16_step1Model1")
colnames(withmat_mcmc[801:1000,1:23])

mcmcID=NA
for(i in 1:23){ # This is to adjust the order of the colnames of mcmc; the order is NOT 1,2,3,... BUT 1,10,100,101, ...
  ID=str_extract(colnames(withmat_mcmc)[1:23][i], "[0-9]+$") # Extract the digits located at the end of each string 
  mcmcID[i]=as.numeric(ID) 
}
colnames(withmat_mcmc[801:1000,order(mcmcID)]) # Make sure to reorder the columns

newcoeftotal = withmat_mcmc[801:1000,order(mcmcID)]

cut1 = apply(withmat_mcmc[,75:278][,which(str_detect(colnames(withmat_mcmc)[75:278], "1.$"))], 1 , FUN=mean)
cut2 = apply(withmat_mcmc[,75:278][,which(str_detect(colnames(withmat_mcmc)[75:278], "2.$"))], 1 , FUN=mean)
cut3 = apply(withmat_mcmc[,75:278][,which(str_detect(colnames(withmat_mcmc)[75:278], "3.$"))], 1 , FUN=mean)
cut4 = apply(withmat_mcmc[,75:278][,which(str_detect(colnames(withmat_mcmc)[75:278], "4.$"))], 1 , FUN=mean)

newcoeftotal = as.matrix(cbind(newcoeftotal, cut1[801:1000], cut2[801:1000], cut3[801:1000], cut4[801:1000]))


sims = 200 # 200 sets of simulation 

# Simulate potential values that will be used to generate predictions.

Xm25 =cbind(rep(-2.5, nrow(data)), avgUnemp16, chgInc1615, inc16,   
            younger30, older65, female, black, latino, noHighSchool, HighSchool, someCollege, fourCollege,  
            income, unemployed, fullTime, partTime, ownHome,
            newsInt, ideol, Dem, Rep, Ind)
Xm20 =cbind(rep(-2.0, nrow(data)), avgUnemp16, chgInc1615, inc16,   
            younger30, older65, female, black, latino, noHighSchool, HighSchool, someCollege, fourCollege,  
            income, unemployed, fullTime, partTime, ownHome,
            newsInt, ideol, Dem, Rep, Ind)
Xm15 =cbind(rep(-1.5, nrow(data)), avgUnemp16, chgInc1615, inc16,   
            younger30, older65, female, black, latino, noHighSchool, HighSchool, someCollege, fourCollege,  
            income, unemployed, fullTime, partTime, ownHome,
            newsInt, ideol, Dem, Rep, Ind)
Xm10 =cbind(rep(-1.0, nrow(data)), avgUnemp16, chgInc1615, inc16,   
            younger30, older65, female, black, latino, noHighSchool, HighSchool, someCollege, fourCollege,  
            income, unemployed, fullTime, partTime, ownHome,
            newsInt, ideol, Dem, Rep, Ind)
Xm5 =cbind(rep(-.5, nrow(data)), avgUnemp16, chgInc1615, inc16,   
           younger30, older65, female, black, latino, noHighSchool, HighSchool, someCollege, fourCollege,  
           income, unemployed, fullTime, partTime, ownHome,
           newsInt, ideol, Dem, Rep, Ind)
X0 =cbind(rep(0, nrow(data)), avgUnemp16, chgInc1615, inc16,   
          younger30, older65, female, black, latino, noHighSchool, HighSchool, someCollege, fourCollege,  
          income, unemployed, fullTime, partTime, ownHome,
          newsInt, ideol, Dem, Rep, Ind)
X5 =cbind(rep(.5, nrow(data)), avgUnemp16, chgInc1615, inc16,   
          younger30, older65, female, black, latino, noHighSchool, HighSchool, someCollege, fourCollege,  
          income, unemployed, fullTime, partTime, ownHome,
          newsInt, ideol, Dem, Rep, Ind)
X10 =cbind(rep(1.0, nrow(data)), avgUnemp16, chgInc1615, inc16,   
           younger30, older65, female, black, latino, noHighSchool, HighSchool, someCollege, fourCollege,  
           income, unemployed, fullTime, partTime, ownHome,
           newsInt, ideol, Dem, Rep, Ind)
X15 =cbind(rep(1.5, nrow(data)), avgUnemp16, chgInc1615, inc16,   
           younger30, older65, female, black, latino, noHighSchool, HighSchool, someCollege, fourCollege,  
           income, unemployed, fullTime, partTime, ownHome,
           newsInt, ideol, Dem, Rep, Ind)
X20 =cbind(rep(2.0, nrow(data)), avgUnemp16, chgInc1615, inc16,   
           younger30, older65, female, black, latino, noHighSchool, HighSchool, someCollege, fourCollege,  
           income, unemployed, fullTime, partTime, ownHome,
           newsInt, ideol, Dem, Rep, Ind)
X25 =cbind(rep(2.5, nrow(data)), avgUnemp16, chgInc1615, inc16,   
           younger30, older65, female, black, latino, noHighSchool, HighSchool, someCollege, fourCollege,  
           income, unemployed, fullTime, partTime, ownHome,
           newsInt, ideol, Dem, Rep, Ind)

n_obs=nrow(data)

# Use this function to create a matrix of predicted national economic evaluations
assignval=function(c1, c2, c3, c4, val){
  if (val<c1){out= 1}
  else if (val<c2){out = 2}
  else if (val<c3){out = 3}
  else if (val<c4){out = 4}
  else {out= 5}
  out
}

bigmat = matrix(nrow = n_obs, ncol = sims*11) # 11 comes from the number of potential value sets


for (i in 1:sims){
  newcoef = newcoeftotal[i,]
  errs = rnorm(n_obs, mean = 0, sd = 1)
  bigmat[ , (i-1)*11+1] = mapply(rep(newcoef[24], n_obs), # Estimated cut1
                                 FUN = assignval, 
                                 c2 = rep(newcoef[25], n_obs), # Estimated cut2
                                 c3 = rep(newcoef[26], n_obs), # Estimated cut3
                                 c4 = rep(newcoef[27], n_obs), # Estimated cut4
                                 val = Xm25%*%as.matrix(newcoef[1:23]) + errs) 
  # val produces predicted latent values of economic evaluation for every observation: Xm25 is multiplied by the estimated coefficients.
  # And then, function "assignval" compares the magnitude of the predicted latent value for an observation
  # with the four estimated cutpoints that are cut1 from newcoef[24], cut2 from newcoef[25], and so forth.
  # Finally, if the predicted latent value for an observation is smaller than cut1, the "assignval" function assigns 1,
  # else if it is smaller than cut2, the function assigns 2, and so on and on.
  # Consequently, the first column of bigmat includes the predicted economic evaluations (1,2,3,4, or 5) associated with Xm25 and the first estimated coefficient sample out of 200 (the number of sims) that comes from newcoef[1,].
  # And, the twelfth column of bigmat includes the predicted economic evaluations (1,2,3,4, or 5) associated with Xm25 and the second estimated coefficient sample out of 200 (the number of sims).
  # In the same vein, the second column of bigmat is associated with Xm20 and the first estimated coefficient sample.
  # And, the thirteenth column of bigmat is associated with Xm20 and the second estimated coefficient sample.
  bigmat[ , (i-1)*11+2] = mapply(rep(newcoef[24], n_obs), 
                                 FUN = assignval, 
                                 c2 = rep(newcoef[25], n_obs), 
                                 c3 = rep(newcoef[26], n_obs),
                                 c4 = rep(newcoef[27], n_obs),
                                 val = Xm20%*%as.matrix(newcoef[1:23]) + errs)  
  bigmat[ , (i-1)*11+3] = mapply(rep(newcoef[24], n_obs), 
                                 FUN = assignval, 
                                 c2 = rep(newcoef[25], n_obs), 
                                 c3 = rep(newcoef[26], n_obs),
                                 c4 = rep(newcoef[27], n_obs),
                                 val = Xm15%*%as.matrix(newcoef[1:23]) + errs) 
  bigmat[ , (i-1)*11+4] = mapply(rep(newcoef[24], n_obs), 
                                 FUN = assignval, 
                                 c2 = rep(newcoef[25], n_obs), 
                                 c3 = rep(newcoef[26], n_obs),
                                 c4 = rep(newcoef[27], n_obs),
                                 val = Xm10%*%as.matrix(newcoef[1:23]) + errs) 
  bigmat[ , (i-1)*11+5] = mapply(rep(newcoef[24], n_obs), 
                                 FUN = assignval, 
                                 c2 = rep(newcoef[25], n_obs), 
                                 c3 = rep(newcoef[26], n_obs),
                                 c4 = rep(newcoef[27], n_obs),
                                 val = Xm5%*%as.matrix(newcoef[1:23]) + errs) 
  bigmat[ , (i-1)*11+6] = mapply(rep(newcoef[24], n_obs), 
                                 FUN = assignval, 
                                 c2 = rep(newcoef[25], n_obs), 
                                 c3 = rep(newcoef[26], n_obs),
                                 c4 = rep(newcoef[27], n_obs),
                                 val = X0%*%as.matrix(newcoef[1:23]) + errs)  
  bigmat[ , (i-1)*11+7] = mapply(rep(newcoef[24], n_obs), 
                                 FUN = assignval, 
                                 c2 = rep(newcoef[25], n_obs), 
                                 c3 = rep(newcoef[26], n_obs),
                                 c4 = rep(newcoef[27], n_obs),
                                 val = X5%*%as.matrix(newcoef[1:23]) + errs)  
  bigmat[ , (i-1)*11+8] = mapply(rep(newcoef[24], n_obs), 
                                 FUN = assignval, 
                                 c2 = rep(newcoef[25], n_obs), 
                                 c3 = rep(newcoef[26], n_obs),
                                 c4 = rep(newcoef[27], n_obs),
                                 val = X10%*%as.matrix(newcoef[1:23]) + errs) 
  bigmat[ , (i-1)*11+9] = mapply(rep(newcoef[24], n_obs), 
                                 FUN = assignval, 
                                 c2 = rep(newcoef[25], n_obs), 
                                 c3 = rep(newcoef[26], n_obs),
                                 c4 = rep(newcoef[27], n_obs),
                                 val = X15%*%as.matrix(newcoef[1:23]) + errs) 
  bigmat[ , (i-1)*11+10] = mapply(rep(newcoef[24], n_obs), 
                                  FUN = assignval, 
                                  c2 = rep(newcoef[25], n_obs), 
                                  c3 = rep(newcoef[26], n_obs),
                                  c4 = rep(newcoef[27], n_obs),
                                  val = X20%*%as.matrix(newcoef[1:23]) + errs)  
  bigmat[ , (i-1)*11+11] = mapply(rep(newcoef[24], n_obs), 
                                  FUN = assignval, 
                                  c2 = rep(newcoef[25], n_obs), 
                                  c3 = rep(newcoef[26], n_obs),
                                  c4 = rep(newcoef[27], n_obs),
                                  val = X25%*%as.matrix(newcoef[1:23]) + errs) 
  if (i %% 20 == 0) print(i)
}
write.table(bigmat, "step1_simNatEcon16.txt")
rm(bigmat)


######## step2Model2 and Computing the effects #######

df=data
df$IDnum = 1:nrow(df)
grouponly=subset(df, select = c(stateID, IDnum))

## Import the estimated parameters for Model2 produced by step1Model2.R
maincoefs = read.table("mcmc16_step1Model2_MainCoeffs.txt")
groupcoefs = read.table("mcmc16_step1Model2_GroupCoeffs.txt") 
maincoefs = maincoefs[12301:12500,]
groupcoefs = groupcoefs[12301:12500,]
groupcoefsID=NA
for(i in 1:ncol(groupcoefs)){ # This is to adjust the order of the colnames of groupcoefs; the order is NOT 1,2,3,... BUT 1,10,100,101, ...
  ID=str_extract(colnames(groupcoefs)[i], "[0-9]+$") # Extract the digits located at the end of each string 
  groupcoefsID[i]=as.numeric(ID) 
}

## Import the simulated potential mediators produced by step2Model1.R
simMediators=read.table("step1_simNatEcon16.txt")

# This is a function that converts simulated national econ evaluations to a matrix
convertEval=function(retnum){
  if (is.na(retnum)){out = c(NA, NA, NA, NA, NA)}
  else if (retnum==1){out=as.vector(c(1, 1, 0, 0, 0))}
  else if (retnum==2){out=as.vector(c(1, 0, 1, 0, 0))}
  else if (retnum==3){out=as.vector(c(1, 0, 0, 0, 0))}
  else if (retnum==4){out=as.vector(c(1, 0, 0, 1, 0))}
  else if (retnum==5){out=as.vector(c(1, 0, 0, 0, 1))}
  out
}


################ NOTE ################################################
# Depending on the size of simMediators, it may take long to run below
holder1 = apply(simMediators[1:5000,], FUN = convertEval, MARGIN = c(1, 2))
holder2 = apply(simMediators[5001:10000,], FUN = convertEval, MARGIN = c(1, 2))
holder3 = apply(simMediators[10001:15000,], FUN = convertEval, MARGIN = c(1, 2))
holder4 = apply(simMediators[15001:20000,], FUN = convertEval, MARGIN = c(1, 2))
holder5 = apply(simMediators[20001:25000,], FUN = convertEval, MARGIN = c(1, 2))
holder6 = apply(simMediators[25001:30000,], FUN = convertEval, MARGIN = c(1, 2))
holder7 = apply(simMediators[30001:35000,], FUN = convertEval, MARGIN = c(1, 2))
holder8 = apply(simMediators[35001:nrow(simMediators),], FUN = convertEval, MARGIN = c(1, 2))
holder = abind(holder1, holder2, holder3, holder4, holder5, holder6, holder7, holder8,along = 2)
dim(holder)

source("step2Model2ComputingEff_2016.R") 
avg.eff = step2Model2ComputingEff_2016(sims=sims)


write.table(avg.eff, "output_unemp16")
